# -*- coding: utf-8 -*-
"""
Created on Sat Jun  4 10:51:32 2022
完成版，提交用
@author: LiTao
"""

from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import time


#长时间定时爬取
for i in range(14):
    #设置无界面爬虫
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--headless")
    # 打开浏览器
    browser = webdriver.Chrome(options=chrome_options)
    ## 最大化页面防止找不到对应元素
    #browser.maximize_window()
    wait = WebDriverWait(browser, 60) # 设置等待页面加载的最长超时时间
    # 打开bilibili心理健康搜索界面
    browser.get('https://search.bilibili.com/all?keyword=心理健康')

    # 进入最多点击
    new=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-header > div:nth-child(4) > div > div.conditions-order.flex_between > div > button:nth-child(3)')
    new.click()
     

        
    file1 = open("心理健康ti.txt", "a", encoding="utf-8")#打开文件并添加内容
    file2 = open("心理健康p.txt", "a", encoding="utf-8")#打开文件并添加内容
    file3 = open("心理健康a.txt", "a", encoding="utf-8")#打开文件并添加内容
    file4 = open("心理健康t.txt", "a", encoding="utf-8")#打开文件并添加内容
    file5 = open("心理健康n.txt", "a", encoding="utf-8")#打开文件并添加内容
    file6 = open("心理健康d.txt", "a", encoding="utf-8")#打开文件并添加内容


    time.sleep(1)




    ##  循环获取单个页面全部视频信息 
    #=browser.find_element(by=By.CSS_SELECTOR, value='')

        #html = browser.page_source
        #soup = BeautifulSoup(html, 'html.parser') 
        #title=soup.find("div",{"class":"bili-video-card__info--tit"})
        
    for i in range(1,21):
           time.sleep(0.1)
           title=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > div > div > a > h3'.format(i)).text#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child(1) > div > div.bili-video-card__wrap.__scale-wrap > div > div > a > h3
           play=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > a > div > div.bili-video-card__mask > div > div > span:nth-child(1)'.format(i)).text
           #print('播放量'+play)
           agree=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > a > div > div.bili-video-card__mask > div > div > span:nth-child(2)'.format(i)).text
           #print('点赞量'+agree)
           shijian=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > a > div > div.bili-video-card__mask > div > span'.format(i)).text
           #print('时间'+shijian)
           name=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > div > div > p > a > span.bili-video-card__info--author'.format(i)).text
           #print('UP主名字 '+name)
           date=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > div > div > p > a > span.bili-video-card__info--date'.format(i)).text
           #print('发布日期'+date)
           file1.write('\n'+str(title))
           file2.write('\n'+str(play))
           file3.write('\n'+str(agree))
           file4.write('\n'+str(shijian))
           file5.write('\n'+str(name))
           file6.write('\n'+str(date))
    for i in range(40):
        ## 翻到下一页
        #下面代码用于控制侧边滚动条，拉到页面底端
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")    
        #翻页动作
        nextpage_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.flex_center.mt_x50.mb_lg > div > div > button:nth-child(11)')))
        nextpage_btn.click()
        print('翻到第'+str(i+2)+'页')#告知爬取进行中及页数
        browser.refresh()
        browser.page_source
        time.sleep(1)#防止被反爬虫程序检测到
        try:
          for i in range(1,21):
            time.sleep(0.1)
            title=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > div > div > a > h3'.format(i)).text
            play=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > a > div > div.bili-video-card__mask > div > div > span:nth-child(1)'.format(i)).text
            #print('播放量'+play)
            agree=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > a > div > div.bili-video-card__mask > div > div > span:nth-child(2)'.format(i)).text
            #print('点赞量'+agree)
            shijian=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > a > div > div.bili-video-card__mask > div > span'.format(i)).text
            #print('时间'+shijian)
            name=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > div > div > p > a > span.bili-video-card__info--author'.format(i)).text
            #print('UP主名字 '+name)
            date=browser.find_element(by=By.CSS_SELECTOR, value='#i_cecream > div:nth-child(1) > div:nth-child(1) > div.search-content > div > div > div.video-list.row > div:nth-child({}) > div > div.bili-video-card__wrap.__scale-wrap > div > div > p > a > span.bili-video-card__info--date'.format(i)).text
            #print('发布日期'+date)
            file1.write('\n'+str(title))
            file2.write('\n'+str(play))
            file3.write('\n'+str(agree))
            file4.write('\n'+str(shijian))
            file5.write('\n'+str(name))
            file6.write('\n'+str(date))
        except TimeoutException:
            pass
        #防止超时不响应
        i=i+1
    #关闭全部界面    
    browser.quit()
    file1.close()#关闭文件
    file2.close()#关闭文件
    file3.close()#关闭文件
    file4.close()#关闭文件
    file5.close()#关闭文件
    file6.close()#关闭文件
    print("ok")#标志运行完全
    print(time.ctime())#告知运行完全时间
    time.sleep(1800)

